** Clearing Stata memory
capture log close
clear all
set more off, perm
set seed 1234

**********************************************************************************************************************************************************
*************************************************************** Importing / Checking *********************************************************************	
**********************************************************************************************************************************************************

******************************************
********** Phase 2 dataset  **************
******************************************

insheet using "Original data/V2008_Fase2.txt"
gen year=2008
rename inscri inscription
sum

* Identify missing in Phase 2 questions
foreach x in port biol quim hist fisi geog mate lest {
forvalues i=1(1)12 {
gen missing_p2_`x'`i'=0 if  pre_`x'=="P"
replace missing_p2_`x'`i'=1 if  pre_`x'=="P" & `x'`i'==.
tab year, sum(missing_p2_`x'`i')
* Missing values only in years from 2000 to 2002
replace missing_p2_`x'`i'=. if year>2002
tab year, sum(missing_p2_`x'`i')
}
}

/* Changing 0s in missing values for individuals who did not take the test and
   *missing values into 0s for those that took the test. */
foreach X in port biol quim hist fisi geog mate lest{
forvalues Y=1/13 { 
replace `X'`Y'=0 if `X'`Y'==. & pre_`X'=="P"
replace `X'`Y'=. if `X'`Y'==0 & pre_`X'=="A"							
}
}
replace aptd=. if aptd==0 & pre_aptd=="A"

foreach X in port biol quim hist fisi geog mate lest {
gen `X'_tot=`X'1+`X'2+`X'3+`X'4+`X'5+`X'6+`X'7+`X'8+`X'9+`X'10+`X'11+`X'12

}

compress
sort inscription
save "Work data/work_data_P2.dta", replace
clear

forvalues Z = 0/7 {
	
clear

insheet using "Original data/V200`Z'_Fase2.txt"
gen year=200`Z'
rename inscri inscription
sum

* Identify missing in Phase 2 questions
foreach x in port biol quim hist fisi geog mate lest {
forvalues i=1(1)12 {
gen missing_p2_`x'`i'=0 if  pre_`x'=="P"
replace missing_p2_`x'`i'=1 if  pre_`x'=="P" & `x'`i'==.
tab year, sum(missing_p2_`x'`i')
* Missing values only in years from 2000 to 2002
replace missing_p2_`x'`i'=. if year>2002
tab year, sum(missing_p2_`x'`i')
}
}

foreach X in port biol quim hist fisi geog mate lest{
forvalues Y=1/13 { 
replace `X'`Y'=0 if `X'`Y'==. & pre_`X'=="P"
replace `X'`Y'=. if `X'`Y'==0 & pre_`X'=="A"
}
}
replace aptd=. if aptd==0 & pre_aptd=="A"

foreach X in port biol quim hist fisi geog mate lest {
gen `X'_tot=`X'1+`X'2+`X'3+`X'4+`X'5+`X'6+`X'7+`X'8+`X'9+`X'10+`X'11+`X'12
}

append using "Work data/work_data_P2.dta"
sort year inscription
save "Work data/work_data_P2.dta", replace
}

replace curso=62 if curso==31
rename area area_st2
rename curso career_st2
rename grupo group_st2
rename port_tot port_tot_st2
rename biol_tot biol_tot_st2
rename quim_tot chem_tot_st2
rename hist_tot hist_tot_st2
rename fisi_tot phy_tot_st2
rename geog_tot geog_tot_st2
rename mate_tot math_tot_st2
rename lest_tot lang_tot_st2

forvalues i=1(1)12 {
rename missing_p2_quim`i' missing_p2_chem`i'
rename missing_p2_fisi`i' missing_p2_phy`i'
rename missing_p2_mate`i' missing_p2_math`i'
rename missing_p2_lest`i' missing_p2_lang`i'
}

foreach X in port biol quim hist fisi geog mate lest aptd {
g pre_`X'2=1 if pre_`X'=="P"
replace pre_`X'2=2 if pre_`X'=="A"
label define pre_`X' 1 "1:P" 2 "2:A"
label value pre_`X'2 pre_`X'
drop pre_`X'
}

rename pre_port2 port_attn_st2
rename pre_biol2 biol_attn_st2
rename pre_quim2 chem_attn_st2
rename pre_hist2 hist_attn_st2
rename pre_fisi2 phy_attn_st2
rename pre_geog2 geog_attn_st2
rename pre_mate2 math_attn_st2
rename pre_lest2 lang_attn_st2
rename pre_aptd2 aptd_attn_st2
rename aptd      aptd_st2

forvalues Y=1/12	{ 
rename port`Y' port`Y'_st2
rename biol`Y' biol`Y'_st2
rename quim`Y' chem`Y'_st2
rename hist`Y' hist`Y'_st2
rename fisi`Y' phy`Y'_st2
rename geog`Y' geog`Y'_st2
rename mate`Y' math`Y'_st2
rename lest`Y' lang`Y'_st2
}

foreach i in 2 3 4 5 6 7 8 {
replace inscription = inscription -`i'0000000  if year == 200`i'
}

gen double inscri2=year*10000000+inscription
format inscri2 %13.0g 

save "Work data/work_data_P2.dta", replace